ohi logo
OHI-Northeast | OHI Science | Citation policy

1 Summary

2 Setup

knitr::opts_chunk$set(message = FALSE, warning = FALSE)

library(tidyverse)
library(readxl)
library(sf)
source("~/github/ne-prep/src/R/common.R")

3 Data Cleaning

raw <- read_excel(file.path(dir_anx, "_raw_data/NOAA_NMFS/catch_by_stat_area/Afflerbach_UCSB_Landings by Stat Area_MAR 2019.xlsx"))

clean <- raw %>%
  rename(year = YEAR,
         stat_area = `STAT\r\nAREA`,
         species = SPECIES,
         pounds = `LBS LANDED \r\n(HAIL WT)`,
         stock_id = `STOCK ID`,
         stock = `STOCK NAME`) %>%
  mutate(stat_area = as.numeric(stat_area))

head(clean)
## # A tibble: 6 x 6
##    year stat_area species                     pounds stock_id stock        
##   <dbl>     <dbl> <chr>                        <dbl> <chr>    <chr>        
## 1  1996         0 CONFIDENTIAL SPECIES        2.02e7 <NA>     <NA>         
## 2  1996       462 LOBSTER, AMERICAN           2.27e4 <NA>     <NA>         
## 3  1996       462 HAKE, WHITE                 2.19e3 HKWGMMA  White Hake   
## 4  1996       462 POLLOCK                     5.95e3 POKGMASS Pollock      
## 5  1996       462 CUSK                        1.06e3 <NA>     <NA>         
## 6  1996       462 FLOUNDER, WITCH / GRAY SO…  5.50e3 WITGMMA  Witch Flound…

4 Statistical areas

stat_shp <- sf::read_sf(file.path(dir_anx, "spatial/Statistical_Areas_2010_withNames.shp")) %>%
  st_set_crs(p4s_nad83) %>%
  st_transform(crs = crs(rgns)) 

stat_shp$stat_area <- st_area(stat_shp) #add area as column

plot(stat_shp["SHORT_NAME"])

Overlay stat areas to find what ones are in our NE regions

ohi_stat_areas <- st_intersection(rgns, stat_shp)
ohi_stat_areas$ohi_area <- st_area(ohi_stat_areas)

plot(ohi_stat_areas["Id"])

Calculate proportion of each statistical area in our OHI regions

calc_prop_area <- ohi_stat_areas %>%
  group_by(Id)  %>% #calculate the total statistical area region
  mutate(ohi_rgn_prop_area = ohi_area/stat_area) #this column tells us how much of each OHI sub-region falls within the statistical area in our region

plot(calc_prop_area["ohi_rgn_prop_area"])

5 Catch per region

Let’s filter the catch data to just the statistical areas in our region.

region_catch <- clean %>%
  filter(stat_area %in% ohi_stat_areas$Id) %>%
  left_join(calc_prop_area, by = c("stat_area" = "Id")) %>%
  mutate(catch = pounds*ohi_rgn_prop_area) %>%
  select(-area_km2, -FULL_NAME, -SHORT_NAME, -stat_area.y, -ohi_area, -NAFODIV)

head(region_catch, n = 20)
## # A tibble: 20 x 11
##     year stat_area species pounds stock_id stock rgn_name rgn_id
##    <dbl>     <dbl> <chr>    <dbl> <chr>    <chr> <fct>     <int>
##  1  1996       464 HAKE, …   4256 <NA>     <NA>  Gulf of…      3
##  2  1996       464 FLOUND…   2930 YELCCGM  CC/G… Gulf of…      3
##  3  1996       464 REDFIS…   3960 REDGMGB… Redf… Gulf of…      3
##  4  1996       464 CUSK     44111 <NA>     <NA>  Gulf of…      3
##  5  1996       464 COD      31842 CODGMSS  GOM … Gulf of…      3
##  6  1996       464 HAKE, …  25578 HKWGMMA  Whit… Gulf of…      3
##  7  1996       464 CRAB, … 200310 <NA>     <NA>  Gulf of…      3
##  8  1996       464 HADDOCK  12570 HADGM    GOM … Gulf of…      3
##  9  1996       464 MONKFI…  84788 <NA>     <NA>  Gulf of…      3
## 10  1996       464 FLOUND…  22237 WITGMMA  Witc… Gulf of…      3
## 11  1996       464 SKATE,…   2104 <NA>     <NA>  Gulf of…      3
## 12  1996       464 FLOUND…  13415 PLAGMMA  Plai… Gulf of…      3
## 13  1996       464 WOLFFI…   1481 WOLGMMA  Wolf… Gulf of…      3
## 14  1996       464 POLLOCK  30408 POKGMASS Poll… Gulf of…      3
## 15  1996       464 SCALLO…    427 <NA>     <NA>  Gulf of…      3
## 16  1996       464 HAKE, …  55368 HKSGMNGB Nort… Gulf of…      3
## 17  1996       464 LOBSTE… 439729 <NA>     <NA>  Gulf of…      3
## 18  1996       465 FLOUND…  50714 PLAGMMA  Plai… Gulf of…      3
## 19  1996       465 MONKFI…  73746 <NA>     <NA>  Gulf of…      3
## 20  1996       465 HAKE, …    907 HKSGMNGB Nort… Gulf of…      3
## # … with 3 more variables: ohi_rgn_prop_area <dbl>, geometry <POLYGON
## #   [m]>, catch <dbl>

Map one species

#black sea bass
bsb <- region_catch %>%
  filter(year == 2017, species == "BLACK SEA BASS") %>%
  group_by(rgn_id, stock_id, stock, species, year) %>%
  summarize(catch = sum(catch)) 

bsb_map <- rgns_simp %>%
  left_join(bsb, by = 'rgn_id')

ggplot(bsb_map) +
  geom_sf(aes(fill = catch))+
  theme_bw() +
  labs(title = "BLACK SEA BASS")

Visualize the data

sp_catch <- region_catch %>%
  group_by(species, stock, stock_id, year, rgn_name, rgn_id) %>%
  summarize(catch = sum(catch))

head(sp_catch, n = 30)
## # A tibble: 30 x 7
## # Groups:   species, stock, stock_id, year, rgn_name [30]
##    species stock stock_id  year rgn_name                    rgn_id    catch
##    <chr>   <chr> <chr>    <dbl> <fct>                        <int>    <dbl>
##  1 ALEWIFE <NA>  <NA>      1998 Gulf of Maine                    3 2627.   
##  2 ALEWIFE <NA>  <NA>      1998 Maine                            6  539.   
##  3 ALEWIFE <NA>  <NA>      1998 Massachusetts-Gulf of Maine      7    7.07 
##  4 ALEWIFE <NA>  <NA>      1998 New Hampshire                    9   47.8  
##  5 ALEWIFE <NA>  <NA>      1999 Gulf of Maine                    3   43.4  
##  6 ALEWIFE <NA>  <NA>      1999 Maine                            6    8.90 
##  7 ALEWIFE <NA>  <NA>      1999 Massachusetts-Gulf of Maine      7    0.117
##  8 ALEWIFE <NA>  <NA>      1999 New Hampshire                    9    0.790
##  9 ALEWIFE <NA>  <NA>      2000 Gulf of Maine                    3  241.   
## 10 ALEWIFE <NA>  <NA>      2000 Maine                            6   49.4  
## # … with 20 more rows
ggplot(sp_catch, aes(x = year, y = catch, color = species)) +
  facet_wrap(~rgn_name, scales = "free_y") +
  geom_line() +
  theme_bw() +
  theme(legend.position = 'none')

Create maps of catch for all species using just the most recent year

map_catch <- stat_shp %>%
  left_join(clean %>%
              filter(year == 2017), by = c("Id" = "stat_area")) %>%
  filter(!is.na(pounds))


for(i in 1:length(unique(map_catch$species))){

  sp <- unique(map_catch$species)[i]
  
ggplot() +
  geom_sf(data = map_catch%>%filter(species == sp), aes(fill = pounds)) +
  #geom_sf(data = rgns_simp, aes(), fill = NA) +
  theme_bw() +
  labs(title = sp)

sp_save_name <- ifelse(str_detect(sp, "/"), str_replace_all(sp, "/", "_"), sp)

ggsave(paste0("figs/", sp_save_name,"_catch_by_statistical_area.pdf"))
}

Calculate species catch per OHI region

#first get ohi regions and statistical area proportions 
catch_by_ohi_rgn <- region_catch %>%
  select(year,species, stock_id, stock, rgn_name, rgn_id, catch) %>%
  group_by(species, stock_id, stock, rgn_id, year, rgn_name) %>%
  summarize(catch = sum(catch)) %>%
  ungroup()

write.csv(catch_by_ohi_rgn, file = "data/nmfs_spatial_catch_by_ohi_rgn.csv")

Let’s look at total regional catch for each species (not stock)

#calculate total regional catch per species
species_catch <- catch_by_ohi_rgn %>%
  group_by(species, year) %>%
  summarize(sp_catch = sum(catch)) %>%
  ungroup() %>%
  group_by(year) %>%
  mutate(yr_catch = sum(sp_catch),
         catch_prop = sp_catch/yr_catch) %>%
  ungroup() %>%
  filter(year > 2004) 
ggplot(species_catch, aes(x = year, y = catch_prop, fill = species)) +
  geom_bar(stat = "identity") +
  theme_bw() +
  theme(legend.text = element_text(size = 6))

plotly::ggplotly()

Clearly atlantic herring is making up the majority of catch!


There are 185. Way more than we have stock information for. Let’s see what species also have stock ids

stocks <- catch_by_ohi_rgn %>%
  filter(!is.na(stock_id)) %>%
  select(species, stock, stock_id) %>%
  distinct()

stocks
## # A tibble: 28 x 3
##    species                                 stock                  stock_id
##    <chr>                                   <chr>                  <chr>   
##  1 COD                                     GB Cod East            CODGBE  
##  2 COD                                     GB Cod West            CODGBW  
##  3 COD                                     GOM Cod                CODGMSS 
##  4 FLOUNDER, AMERICAN PLAICE /DAB          Plaice                 PLAGMMA 
##  5 FLOUNDER, SAND-DAB / WINDOWPANE / BRILL Southern Windowpane    FLDSNEMA
##  6 FLOUNDER, SAND-DAB / WINDOWPANE / BRILL Northern Windowpane    FLGMGBSS
##  7 FLOUNDER, WINTER / BLACKBACK            GB Winter Flounder     FLWGB   
##  8 FLOUNDER, WINTER / BLACKBACK            GOM Winter Flounder    FLWGMSS 
##  9 FLOUNDER, WINTER / BLACKBACK            SNE/MA Winter Flounder FLWSNEMA
## 10 FLOUNDER, WITCH / GRAY SOLE             Witch Flounder         WITGMMA 
## # … with 18 more rows
ggplot(catch_by_ohi_rgn %>%
  filter(!is.na(stock_id)), aes(x = year, y = catch, color = stock_id)) +
  geom_line() +
  facet_wrap(~rgn_name, scales = "free_y") +
  theme_bw() +
  theme(legend.text = element_text(size = 7))


We have records for 0 catch. But what about the missing data? Let’s look at AMBERJACK, SPECIES NOT SPECIFIED.

amb <- clean %>%
  filter(species == "AMBERJACK, SPECIES NOT SPECIFIED")
unique(amb$year)
## [1] 1998 2000 2002 2004 2005 2007 2012 2017

Ok clearly we are missing 1999, 2001, 2003, 2006, 09-11, 13-16.

6 Gapfill

We need to gapfill this missing data. When a species/state combination has missing data for a year, we can not assume it has a catch of 0. Since we calculate a rolling average of catch, NAs will remain as NA’s and the average will rely on just one or two years of catch. This is done to account for any wild fluctuations in catch year to year.

toolbox_data <- catch_by_ohi_rgn %>%
  group_by(rgn_id, rgn_name, species, stock, stock_id) %>%
  complete(year = 1998:2017) %>%
  arrange(year) %>%
  mutate(mean_catch = zoo::rollapply(catch, 3, mean, fill = NA, align = 'right')) %>% ## create a new column `mean_catch` with rolling mean of 3 yrs
  filter(year > 2004) %>%
  select(year, rgn_id, rgn_name, species, stock, stock_id, mean_catch) %>%
  ungroup()

ggplot(toolbox_data, aes(x = year, y = mean_catch, color = species)) +
  geom_line() +
  facet_wrap(~rgn_name, scales = "free_y") +
  theme_bw() +
  theme(legend.text = element_text(size = 4),
        legend.position = "below")

plotly::ggplotly()

7 Save to toolbox

# save to toolbox
write.csv(toolbox_data, file = file.path(dir_calc, "layers/fis_meancatch.csv"))